{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Imports"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd \n",
    "from pyspark import SparkContext\n",
    "from pyspark import SparkConf\n",
    "import pyspark\n",
    "from pyspark import SparkConf, SparkContext \n",
    "sc = SparkContext.getOrCreate()\n",
    "from pyspark.sql import SQLContext\n",
    "sqlContext = SQLContext(sc)\n",
    "from pyspark.sql.session import SparkSession\n",
    "spark = SparkSession(sc)\n",
    "from pyspark.sql.functions import *\n",
    "from pyspark.sql.functions import to_date\n",
    "\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Load Data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_traffic = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/traffic_weather_data/Traffic.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- EventId: string (nullable = true)\n",
      " |-- Type: string (nullable = true)\n",
      " |-- Severity: string (nullable = true)\n",
      " |-- TMC: integer (nullable = true)\n",
      " |-- Description: string (nullable = true)\n",
      " |-- StartTime: timestamp (nullable = true)\n",
      " |-- EndTime: timestamp (nullable = true)\n",
      " |-- TimeZone: string (nullable = true)\n",
      " |-- LocationLat: double (nullable = true)\n",
      " |-- LocationLng: double (nullable = true)\n",
      " |-- Distance: double (nullable = true)\n",
      " |-- AirportCode: string (nullable = true)\n",
      " |-- Number: integer (nullable = true)\n",
      " |-- Street: string (nullable = true)\n",
      " |-- Side: string (nullable = true)\n",
      " |-- City: string (nullable = true)\n",
      " |-- County: string (nullable = true)\n",
      " |-- State: string (nullable = true)\n",
      " |-- ZipCode: integer (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_traffic.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+--------------+--------+\n",
      "|          Type|   count|\n",
      "+--------------+--------+\n",
      "|         Event|   44870|\n",
      "|Broken-Vehicle|  518269|\n",
      "|  Lane-Blocked|  400565|\n",
      "|  Construction|  514021|\n",
      "|    Congestion|20294005|\n",
      "| Flow-Incident| 1179094|\n",
      "|      Accident| 2168224|\n",
      "+--------------+--------+\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_traffic.groupBy('Type').count().show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_traffic_type=df_traffic.groupBy('Type').count().toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3deZwcVbn/8c+XLQFBEBIWyY1hUQIuRAyKqCwqq+AVRFHWCBhU9KeCsigiCiKiiF4VlDWABBBBrmFfA4KEJZdFFBSULQISIktUAgG+vz/O6aTT6Ulmpnu6qrqf9+s1r+k6VdPz1Cz9dJ065zmyTQghhFA2SxQdQAghhNBMJKgQQgilFAkqhBBCKUWCCiGEUEqRoEIIIZRSJKgQQgiltFTRAYTQKkn3A0/mzbGAgPvy9uq2xw7yedcAzgeWBIYBmwH7AvvlQ04CPgfsbPvBATzvpBznHGB1YC3glrr4N7H98GBiHgxJ+wN7AnNJ5/kocJrtqzsVQwjNRIIK3eBJ21vAvBf/pWzvkbentvC8nwPutz1R0t6AgR8CbwBeALYGNrP97CCe+xO2H5Y0ATi6If6OkXQUsCmwne3ZkpYAjgQuAFYa4u89BnjItoby+4TqigQVusFhg9y3OKOARwBsnynpDaTk93jef/4gn/cE4B+D2NdWktYBvgZsYHs2gO1XJX0T+GgnYghhUbryHpSkjSU9mN+d9uf4LSTdL2lq3cffJR04xKGGNrB9y6L2SZoo6WFJ50k6RdJd+Xe8vKSzJV0t6XeSTpK0FICkI4DtgAn52M3JCSlvf1fS8ZKerf87k7S7pGmSrssfH2gS0922X+gj3ruBn0iypFtyjFtJ+pukOyQdIelJSadKuljS7ZIulTSiLoZt8tfeIGmKpNf38ePZCXjM9p8bYjCwSd3zbZyf68b8eePcfmCO5ci8/a36n0ddrD/NP+d7JZ2Z960InFf385wqacm+fo+hR9nuqg/SP91kYDowoZ9f815gt4a2O4ExRZ9PfAz49z8J+GWT9iNJ96lGkt6YfQ9YGdij4Wv3bdg+sm57DPn1u65tau3vjNRV9iQwMm/vAkxaTLwTgBlN2u+p/5skvZgvXxfXg8AKeftkYHJ+vBYwG1gvbx8AXNPH9z4JuHkx8a0IPA1skbffl7dX6uNnNO/nUbf/TtK9reHALODdff084yM+6j+68Qrqdtu7kf5J+8X2TbYn17YlvQWY7Q7eqA4dcYvtmbZftX0I8AzwBkk35XtVWwDvaOH5PwVcZntm3r6YlAQG4yxgLwBJI4E5tv9Vt/9S52454Gxgl3wFshtwh+dfFU0GPpAHfAzGDsDztqcC2P4d6ef24QE8x/W2X7Q9B3iAlERDWKyuS1C2Z/S1T9JeufvlBkmTJb22j0N3B345NBGGAj3XsL03sD/wYadBCpOA5Vp4/lFALTlh+2Xbtw7yuc4BtsyJZTdyd1idZ+oezwKWBkbkGDaodZsBvyHdR1utyfd4EFhzMXEscE7ZzNzeX8/XPZ4DLDOArw09rOsSVF8kvYc0AmtH25sDf8/bjccJ2Bn4VWcjDAV4J3Cb7X/m7aVbfL7HSF2IAEhaStKGg3ki208A15PeLG0FNA75Xrnu8QjSEPGncwx32N6i9gG8HfhDk29zEbCmpDfVN0paQdKVkoY3nlM2Eqi9EXyJ1H1XM6Qj/0Jv6ZkERerrn1LX/TIZ2D0npHrvA/7gwQ0dDtXyILChpGF5cMRCAxoGaBKwfd2AhV1Jf3eDdRZwEPAX26807Nta0gr58V7Ar/Mx5wLvyiMOkbQq6b7QQv/rth8CvgmcKGn5fPzSpJGE9+YuuUuAFSRtlve/B3gd8Nv8NA8Bb8n71gXeOIDzm52/bjlJh0raZHFfEHpLLw0zn9f1kbeXIg3nXYX0zrMmuvcqStJxwLbpoY6zfXBu342UKIZLOsv2XvlLfkF6Q3IXcC/wBLBtHr25fH6uOZLeSBquPm/UGfCdvH8ccKikmbYvlfQVYIqkF0l/V/ssIt7dgEOBkZKusr11wyG/AX5OusfU6FrgjJyIZpLvV9l+KD/vZElzgVeB/W3PbRaD7WMkzQSukTSHdDV0KXBM3v+8pG2B45XmSJk0Z6r2Bu504MOSpgE3AnfUfh7AenU/w7uBt9b9vB61fZ2kyfnrngF+3NfPKvQm2d25YGF+EZlke1LePhmYa/uAumNG2H66bnsZ4I/Am22/1NmIQ1hQTghX2t6qoX0S8LDtI4uIK4RO6aUuvknAhyS9DkDSesCUhmO2A66O5BSKJOld+b7QNix87ymEntF1XXyS3gEcz/yuhA/b3tn27yUdDlwu6T+km7t7N3z57sCPOhtxCAtZgzSKbwbw3/U7lCYQ17rNHrN9WgHxhdARXdvFF0IIodp6qYsvhBBChXRNF9+IESM8ZsyYosMIIYQwQNOnT3/aduN8u+5JUGPGjOGOO+4oOowQQggDJOmRZu3RxRdCCKGUIkGFEEIopUhQIYQQSqkj96Byfa8DgI8AIlUzPtz2tX0cvztwIKmsyg3AVxzj4UMIFTd37lxmzJjBnDlzig6lEMOHD2fUqFEsvXT/6jJ3apDEmsAXgXG2n5O0FfC/ktaz/ff6A/NaTMeTClD+k1Rz7HPAzzoUawghDIkZM2awwgorMGbMGBauU93dbDNr1ixmzJjBWmv1b0mwTiWo2cARtp8DsH11Lky5KXBBw7H7khZ9expA0unAwUSCCqGUxhx6ace+18PHfqhj32sozJkzpyeTE4AkVlllFWbObFxerG8duQdle5bteRWZ8xIXy7DwQmgAGwP3123/CXizpGWHNsoQQhh6vZicagZ67kXNg9qctMrnjU32rcaCK58+S7pvNYK0eNo8kiYCEwFGjx49JIGGEMJQaffVZ9WvMBt1fBRfXqXzGGCC7Vf7OKzZgIiFUq/tk22Ptz1+5MiFJiGHEELooIcffpiLL764bc/X0QSVu/ZOBk6wPb2Pw55iwWWjVyIlrP53XIYQQui4Sico0ui822xfkJfZbtYvdztpJc6aDYA/2n6hIxGGEEIX+/a3v83hhx/OUUcdxS677MLjjz/Ofvvtx3HHHcenP/1pbr75ZubMmcOECROYMGECACeffDK1WqdTpkxh3XXX5ctf/jL77bcfm266KQ8//DAvvfQSkyZN4q677uLII4/k/vvv7zuIfupYgpJ0COme1yRJywPrAPtIGiHpd5JWzoeeCmwvaZW8ougE0rLXIYQQWnDllVcybdo0jj76aL7xjW+w3XbbcdBBB7Htttty8MEHc/TRR7PrrrsybNiweckJYOLEifMe77jjjrz3ve9l7NixnHrqqey0005ceOGFLLPMMkyYMIFx48Zx5JFHMnbs2Jbj7UiCyquDHgt8gTTkfDZpaXWAZYGxwHIAtu8FvgJcBdwK3AWc2Ik4Qwihm91zzz2su+6687b33Xdf7rnnHtZee20AVlttNZ577jmefvrpxT7Xm970JgBGjhzJ7NmzhyTejozis/0XmgxyqLPACAfb55BWFA0hhNAmG264IVOnTp23ffrpp7Phhhvy17/+lY022ognn3ySlVZaiREjRrDCCivw/PPPA/Doo48u9FzNhowvueSS2GbOnDk88sgjrLfeegsdMxBds9xGCCFUTaeHhW+99dZMmzaNww47jOHDh7PKKqvwgx/8gK9//es88MADPPjgg5x33nlIYty4cbz66qscddRRjBkzhueee45zzz2XddZZh3vuuYezzz6b9dZbjylTpvDMM8/w4IMP8uY3v5kZM2Zw0EEHscMOO7ScoLpmyffx48c71oMKofOikkT/3Xfffay//vpFh1GoZj8DSdNtj288NqqZhxBCKKVIUCGEEEopElQIIXRQt9xWGYyBnnskqBBC6JDhw4cza9asnkxSteU2hg8f3u+viVF8IYTQIaNGjWLGjBkDWnKim9QWLOyvSFAhhNAhSy+9dL8X6wvRxRdCCKGkIkGFEEIopUhQIYQQSikSVAghhFKKBBVCCKGUIkGFEEIopUhQIYQQSikSVAghhFKKBBVCCKGUIkGFEEIopUhQIYQQSikSVAghhFLqaIKStLGkByVNWMQxW0i6X9LUuo9dOhhmCCGEEuhYNXNJOwEfA57rx+HH2p40tBGFEEIos05eQd1uezdgdge/ZwghhIrqWIKyPWMAh39E0vWSfifpcEmxblUIIfSYMg6SeA64BdgK2A74IPC9ZgdKmijpDkl39OoKlSGE0K1Kl6Bs32n7e7Zftv0vUnL6jCQ1OfZk2+Ntjx85cmTngw0hhDBkSpegmngUWA6IDBRCCD2kdAlK0v+TNLyuaTXgJWBWQSGFEEIoQOEJStKIPBhi5dy0EfDxvG9J4AvAZNuvFBVjCCGEzutYgpL0DklTgXHAoZIuyruWBcaSuvEATgE+Iel6YBowE/hip+IMIYRQDh0bvm17OrBFk/bHqLu/ZPtmYPtOxRVCCKGcCu/iCyGEEJqJBBVCCKGUIkGFEEIopUhQIYQQSikSVAghhFLqd4KS9PYmbV+QtH57QwohhBAGdgV1fJO2PwCntSmWEEIIYZ7FzoOStFl+uJKk9wH1RVuXBVYcisBCCCH0tv5M1D0zf14dOKth3/PAiW2NKIQQQqAfCcr2WgCSTre9z9CHFEIIIQzgHlRfyUnSl9oXTgghhJD0uxafpBHAvsBawDJ1u7YFftTmuEIIIfS4gRSLvZi0HPs9wJy69hfbGlEIIYTAwBLUkrY/1Ngo6f/aGE8IIYQADGwe1A2SRjdp36hdwYQQQgg1A7mC2gQ4QNKfScPLa8YB32prVCGEEHreQBLUasDnG9oErNG+cEIIIYRkIAnqYNtTGhslPdLGeEIIIQRgYPOgFkpO2SZtiiWEEEKYZyDzoE7vY9e2wHfbE04IIYSQDGQU35bAI3Ufz5FG8F0wBHGFEELocQO5B/Vl2xfXN0gaDpzc3yeQtDFwLnC07UmLOG534EDAwA3AV2x7ALGGEEKouIHcg7q4SfMrwEILGTYjaSfgy6Qrr0Ud9xbS2lPbAO8kXaV9rr9xhhBC6A4DuQd1XUPTMOCNwOX9fIrbbf9G0tTFHLcvcJntp/P3PR04GPhZf2MNIYRQfQPp4lsDOLZu+yXgIdvT+vPFtmf08/tsDPy2bvtPwJslLWv7hX4+RwghhIobSIL6nO3rhyyS+VZjwW7AZ0kTgkcAj9UfKGkiMBFg9OhmVZhCCCFU1UDuQV0v6T2STpV0Rf686RDF1WxAhBY6yD7Z9njb40eOHDlEoYQQQihCvxOUpL1IQ8pfBabnzxdI2qPNMT0FrFS3vRIpYc1s8/cJIYRQYgPp4vsM8Fbbs2oNklYBpgC/bGNMtwPr1W1vAPwx7j+FEEJvGchE3bn1yQkgb89tJQBJIyT9TtLKuelUYHtJq0haApgA/LyV7xFCCKF6BpKgZko6QtIoScPy58NJXXKLJekdeYj5OOBQSRflXcsCY4HlAGzfC3wFuAq4FbgLOHEAcYYQQugCAxrFB5wDHMn8QQxXA3v254ttTwe2aNL+GDCyoe2c/L1CCCH0qEUmKElLke4BAfzB9laSXg+MIi0Bf8tQBxhCCKE3La6LbzfgDuAQYBkA24/bvg3YV9K0untHIYQQQtssLkHtAXzY9u62X6zfYXs/4BfA94YquBBCCL1rcQlquO0r+tpp+wwWHBIeQgghtMXiEtTL/XiOV9sRSAghhFBvsQlK0lp97ZS0NmnJjRBCCKGtFpegvgNcJ2lfSaMlLZk/RkvajzTM/IihDzOEEEKvWeQwc9s3SPosaS2mxpVzHwD2sX3zUAUXQgihdy12oq7tKyStS1rZdp3c/CBwZyzDHkIIYaj0q5JETkTT80cIIYQw5AZSiy+EEELomEhQIYQQSikSVAghhFKKBBVCCKGUIkGFEEIopUhQIYQQSikSVAghhFKKBBVCCKGUIkGFEEIopUhQIYQQSikSVAghhFLqWIKSNFzSJEnTJN0haes+jpsg6S5JU+s+3tupOEMIIZRDv4rFtsmRgGxvIulNwDRJ69v+R5Njv2R7agdjCyGEUDIduYKStASwH3AagO2/AHcCe3Ti+4cQQqieTnXxrQ2sAtxf1/YnYHwfx+8v6Ybcvfe5IY8uhBBC6XSqi2+1/Pm5urZngQ2aHPsP4CpgEjASmCppSds/aTxQ0kRgIsDo0aPbGW8IIYSCdXoUX+MKvFroAPty22c4eQr4CdD0Ksr2ybbH2x4/cuTIIQg3hBBCUTqVoJ7Kn1eqa1uprn1RHgXe0PaIQgghlFqnEtRfgX8C69W1bQDc3nigpIMbmlYDHh+60EIIIZRRRxKU7VeBU4B9ACS9ERgHnCNpfUnXSloyH76dpM3zccsB+wNndyLOEEII5dHpeVA/lzQtf99P2n5S0hhgLLA08ApwPHBEHpq+PHAt8N0OxhlCCKEEOpagbM8BJjRpnwasWbd9CXBJp+IKIYRQTlGLL4QQQilFggohhFBKkaBCCCGUUiSoEEIIpRQJKoQQQilFggohhFBKkaBCCCGUUiSoEEIIpRQJKoQQQilFggohhFBKkaBCCCGUUieLxYYQQijAmEMv7cj3efjYD7X1+eIKKoQQQilFggohhFBKkaBCCCGUUiSoEEIIpRQJKoQQQinFKL4QQmhQ1VFv3SauoEIIIZRSJKgQQgilFAkqhBBCKXUsQUkaLmmSpGmS7pC09SKOPUjS9Pzx1U7FGEIIoTw6OUjiSEC2N5H0JmCapPVt/6P+IEnbAp8GxuWmuyT9yXZn7lpWXLfd3O3U+UD3nVPcgA9V15ErKElLAPsBpwHY/gtwJ7BHk8P3BybbnmN7DnAO8JlOxBlCCKE8OtXFtzawCnB/XdufgPFNjt24n8eFEELoYrI99N9Eeg9wEzDc9ou57ShgU9sfaDh2LrCD7Svz9geAq2wv2eR5JwIT8+Z6wJ+H7izmGQE83YHv00nddk7ddj7QfefUbecD3XdOnTyfN9ge2djY6Ym6jdlQ/Tyu+UH2ycDJLUU0QJLusN1VV3Tddk7ddj7QfefUbecD3XdOZTifTnXxPZU/r1TXtlJde+OxjcfNHKK4QgghlFSnEtRfgX+SuuFqNgBub3Ls7f08LoQQQhfrSIKy/SpwCrAPgKQ3koaRnyNpfUnXSqrdY/o58Mk8b2o4sFtuK4uOdil2SLedU7edD3TfOXXb+UD3nVPh59ORQRKQJuqSEs1Y0r2vr9m+StImwIXAOnlYOZIOIiUmgPNsf78jQYYQQiiNjiWoEEIIYSCiFl8IIYRSigQVQgihKUlrFvn9I0ENUi7fFEpO0g5Fx9BOkj5RdAztJmmLomMIIOn8Js2H9NHeEXEPqh8k/dj2Fxvafg08ZvvLBYXVMknvIpWgugpY0fasgkMaEEl79eOwQ21vMOTBtJmkZYCdgLWAZep2TbC9djFRtS7/zTWeU1V/R4fZ/m5D20XAzbaPLyisQZN0ne33N2m/2fZ7iogplnzvn7c2NtjeRdLvigimVZLWA/4XWJ00MfodwJWSvmH78kKDG5gfA3flxwLeBfyFNOduFWB94JZiQmvZb4DRwL3AnLr24cWE0zpJk4AdgAdY8JxWLySg1m0FLJCgbO8s6WagMglK0kOk6j2rS/pbw+7lSH+DhYgEtQiSrif94sZJuq5h93JU9+f3U+BA25dJut72bEmbA5cCVUpQZ9v+fwCSfgh80fbdtZ2SxlHdSvirA29zQxeHpE8XFE87vANYs1aPs0bSNwqKZ1AknZ4fjq17XLMc8NoOh9SqCaQ3eCcAX2rYNxu4u/ELOqWqL7CdMil/XgM4s2HfbKAxaVXF0rYvy48NYPvfkirV31tLTtl42wc27L9L0ps7HFa73A6sADzf0F7le593Aq80ab+n04G06JH8+cW6xzWzga93NpzW2L4BQNKueSmkBUgay4IrTHRMJKhFsH0mpEtg2zc27pe00sJfVQmStHntDzM3VL3I5bKSPg5cYNt5EMuuwLIFxzVYKwJ/lHQrCyapbYFfFBNSy54Dbs09E/XnNIHU5VwJtr8FIOk+278qOp52sf0XSasDY1jwHuGPgI2KiCkGSfRTfsFblQV/cefZ3rSgkAZN0jtIXXmzSfdqHiOV1t/e9p1FxjZYOcFeBIwEnmV+MeKdbU8vMrbByPcFJjXZNcH2Wh0Opy0kPQFc0WTXNrZf3+l4hoqkK2xvW3QcAyXpO8BXgVkseI9wNdvLFRFTXEH1g6TPAseR+pfrlwipZHa3PV3SusCOwJqkBHWJ7dnFRjZ4tu+QtA7wblKX7BPALbbnFhvZoB1j+5TGRkkLdcFUyIm2j2pslFTJkbCS/hv4HumKY+laMxV9XQB2AUbbfrK+UdLPCoonrqD6Q9IDpNqA99Tf4JV0oe2PFhdZe0n6lO0zio6jFVUfOl8vX7VvCowivYm4JRderjRJo0jnNMP2jKLjGaz8uvA10j202hWHgHNtv7uwwAZJ0q9sf7xJ+7ts31pETHEF1T/32m625MeBTdpKSdIR/ThsAlDJBNUwdP4fwHiqOXQegHyFOwV4A/AM8DrgYUk72v5rocENkqSRwLlAba6N8+jY3WxXcc23v9i+oLFR0jZFBNMG90uaAlzJgvcIDyUte9RxcQXVD5IOJs1JuYIFf3E/sl3IzcOBWkT/f73K3guQdDVwQt3Q+S0lvQa41PYWBYc3YJIuB35Jejf+ar6a+gSwp+3tio1ucPLk9jtJb4Jmku7p7g1sZHuXImMbDEmfI3X7N74uVPXe9DPMn1dYb0PbK3c6HogrqP76FvAk8KGG9tUKiGWwmvb/16vafJQGXTF0vs6yts+pbeSuvcmSJhYYU6tG2P5O3fbfgWMkTS0onlb9NH8+rqG9qn9zk20f0Ngo6XtFBAPVnlPRSZfZXqvxA1jo8r6s6pOTpFUkbSZpRN5eVdLbFpfASk55snF9Q5WHzi+dB33MI2ltqv2mcpikBd6J5+1hBcXTqhtsL9H4AVxTdGCDUZ+c6n9Ptg8pJqLo4utJkk4lDSs/yPZfJa0BnAb81naZVi/ut24bOi9pW9IboFtJ3WEjSffVdrFdyRfAXDvxOOAS5p/T9sBXbE8uMrbBkLR0hUeJLkTS8qRqEruTRsG+k3QfdM+i7ntGguoHSUsBhwB7Ai8Dm5NWB/5MFUeJSboR2Ly+jE6+x3Gj7fcWF1lrJL2WVOutNurtEuBfjeWCqiJfMX2C+edzru2HCw2qRZLeR/o/qp3TJNtVrZeIpN1I51O7R/g14Bu11cGrRNIvSd2u5wH/Y/t9efDRCba3LyKmKncXdNIPSfebDgUOtj1L0onAKcDOhUY2OGp80c434iv5Qg4g6Zt5hv/khvapwBZFxNQq238Djqlvq/pUANu/AxYosixpu4qOtDwC2IZ0pbub7Wck/YlU6WPvQoMbnDVt7wEgaS6A7T/nyvqFiATVPxva3hxA0v8DsH29pErV3KpzX+7mm8T8rpYJwH0FxtSqvSSdYPt5AEkrkqqdv6/YsPpP0ma1klqLmBYwgQpNBZC0dk60i1oe5VCqVaS45oPAZrZfyZN2sX2GpD0Kjmuwhkta2fY/aw25nNtrigooElT/LCNpWMMk3WHA8gXG1Iovk/qarybdoJ4DnAUcVGRQLXoJODWPctsMOJH0Tr2wSsyDcIik/7P9L+CzNJ8WULXlNn4j6X35jUP98ij1qrrcxpK2a8VvDfO6ygspC9QGJzF/LtRakn5CGrl8eFEBRYLqn4uA/5N0HrCGpIOAjwGFrTTZCtv/BiZK2p909TQzF1jVYr60zD5Aug9wOWly6+dsXyypMvO6bNdPYzje9g8aj5H0lQ6G1DLbG9ZtnmZ7ofglLXSeFXGzpGtIKx28TtJHSRVnri82rMGxfZakv5K6J+8j1R3dvch7hJGg+sH29yU9TvrFGdga+LHtc4uNrDX5PtRTdU3XMn+Wf+lJGt2k+SDgm6RuzNGkG76VmzTJ/Npu8yiv1lpALO3yx8aGfGP+kgJiaYevkQZPfYM06OM7pGT1/SKDaoXtm2n4G5M03vYdRcQTo/j6QdJets8qOo5WSLoC+JjT4oSvsvBkQpFy1pKdj25w6s6jsYDvAttVOqcalXD57VY1OydJywHX2d6koLDaTtJStl8uOo7+kLRZPw4rrGJOXEH1zw8lvRE4y/YDRQczSEd5frXyW0lDYuuJVCetSm6wveWiDpB0ZaeCaQd132qt9QM+xjQZ/LEcae2rbnIV1emJuJJUJQfSm7tRwAvAP4GVSffZGxdl7JhIUP1zKWko6cQ8N+UaUr2tZ4oNq//ypXvNjrafbjwm1xyskq0Xd4DtqhXu7KrVWrPa+lXL1j2umQ18qrPhDF4fvQ9VNqVWwVzSYaTC2FNqO/PoxI2LCi66+AYoj9LZgTQK7i5XdLkN9bFyZlWK3zZSWrPrc8BE27dIGkcaybev7coNn5f0cXfRaq2QpmjY/p+i42iFpFtIvQ9i/npq55OuOEaQBkk8Zbuw+nWDtYhu5alFFVyOBNUPkr5g+ydKaw3tRfoDfYLU5ddYKLL0NH/lzKdJ79RrCls5s1WSbgI+afuxura1gZNtf7C4yAZH0ttJL4C/tv0nSW8iTQr9mSu6JpSkZYGRth/NI0b3ApYk/R9V5Z7N620/nh9f1qzCgqQrK3jljqS7gU/bvq2u7d3AL2y/rYiYoouvfw5TKq2/MmlU2Nau4DLidUq3cmYbvFSfnCBVYpBUuQES2VHAdaRyQAAzgBVIc1X2LyqoFv0PsLKkT5LeIO1NOq9NgEpUaa8lp2y0pBVtP1drkPQ60n2cKjoUuFbSk6Q3ryNJS6IstIhhp0SC6p+5wMHA5VV5p7cYdzcmp6zKIxUl6b22b6prqGxdQdJyGz+sbdj+D2lpikrOscneaHuLfPU0EdjC9kOSClmttQ0mA3+SdBnzX9C3JyXiyrF9uaS1SJNzVyf1El1aZL3RSFB9yC8EJiWmXVy3om4emvkt0hDmSozWaSgzc7+k35JGG5Vi5cw2OAi4QtLzzC/ftAKwbaFRDd5yTaqXDKd6lSTq1Zb3+QBpNdqH8vbsPo4vNdvHSAgMoVEAABLwSURBVLqL1CMxjvSCvo/txS0MWlp58NSZ9W2SDrP93SLiiXtQfVBelTU//ibzF8H7dt0xF9j+WEEhDoj6Xi2zXmErZ7aDmlQzrxtaXym5WsnewK+Yn3A/Dpxh+4QiYxusXGD5DcBbSVdQN+TPH7S9Y5GxtVOVit/muoGTc7HoxmkNNdu6oJW24wqqb/WZeypp1N6XFnFM2TVdLbOeClw5sx1sP6+0lMgoYEZVkxOA7eMlPUEqDltLuN+tePWSA0hXtM/YnpbfUDxLhWpAdmHx2/eQSrn9B9iSVEC60YtN2joirqD6UH8F1Ww7t1XmCqovjdWLq0rSqqR7ArUuV5MGGexme2ZhgbWZpIWWSqmaPBp2FVIX84pF3uMYqDzS7X35zVBfvRKV7ImQtIftX/a3vRNiyfdFULJEnvu0wHatrYokLS/pFEn/AW5XWgL+92pYYrxiTiQV6RxFqtA+Om+fVGRQQ+DaogMYLEnrSbqfVL3gR6SJu1dK2q7YyPrP9oa5Mjuk4rdbNn4AfXWVld3NkvaS9AYASW+QtGNRyQniCqpPTWaMiyZdehWt81a6lTNb1ddkwiInGbZiURULqvg3ByDpatLf2GW1HglJryGNFNui4PAGLA9aWZW0GsALSmuQLVPVK3ZJ5wP/AI6x/aSklYFjgX/Y/kYRMcU9qL7dzcL3nOoJOLIzobRd6VbObINhjd2V+R9sWIExtaKxXuLrSEOYq9wdu7Tty/Lj2qCjf6u6KzkfQ1p7bB/gHtI8yV9JOsb2bwqNbHBWs71rbSP/L03M93ULEQmqb1+1fcOiDpBU2EJeLSrdypltcBJpTsolzB/1tj1QqfWT6mxpe07d9iPAXfn8fl5QTK2SpM3r/68kjS8yoBa9C3i37dqbvIfy3LtrgComqGZLvCxBgXkiElQfbF/Tj2NuWtwxJVW6lTNb5bTY2kPAnsCGpFFvH3WBi621aFUtuH7kMOAtwNhiwmmLA4HLJc0GVpH0B1L9ukp2KwNza8mpxvaLuXu2im5Uqv5/NvPf5O1JGsVciLgH1SMkXWv7A3Xb7yHNs6kNYZ5U4RfzPkl6u+07i45joJq8yM0F/gZ83fZFBYTUFnloea3IatXnqv0K+ANwBvNf0CcAb3OuEF4luSzYISw4tWES8P2iKuhEguoRub7WXcD/AefbvrvgkFqmki+21gpVtODookjaz/apRcfRLpJGAr8EtmL+gJargT2rOlCibCJB9QhJX7Z9gqSNgF1J3WC3Ab+yfW+x0Q2OpBdI5WW0iMMqWaE9l9qaZvuwomNpl9wFezoL/74MPAVca/vBjgfWIklrkq8IbT9RdDyDJen9wB7ASbZvl/Q2Ui/LN23/q5CYIkH1LknvJJXPeTMwjZSsKrN2kqQLvZj1uPpzTBlJutP224uOo50kXUiqw3cDaTTiKqRKBleTVm59N7C/7V8XFmQbSJpke0LRcQyUpGtI8wn/1/YructvV+AjRXVZVnayaWid07ovpwG3k25gV+pKqj+Jp4rJKbs9D5NfgKRJBcTSLv8BNrD937Y/ZfvDwNuAf9vegVRwtTKrOkvaRNIVku6X9Lf88RAFLk/RKtsX2X4lP37F9mTSQJZCxCi+HiHpjbYfyI/XJb0z2pV09fQg8FPSyqCVorQk9brAVNvT89D/z5P+tq8FDnCT5e0rYAXgXqUVXJ+ra1/sMvcltrYXXE8J23+XNDY/fkxSIV1Jg3QGqcvyHqA2JUCkup1V9Jo+5hIWVkE/ElTvOF/SeaSkNA54CLgA2LuKo9wAJP0A+CIwC/iOpMOAj5Jmv78C7AR8H/hUYUEO3ibAL5q0F1a4sw1ekXQwcA7z10/aE3gVUikkUldfVcyw/f3GRkkfLiKYNvgp8EdJl1KSuYRxD6pH5GHLDwG/Jt1rqvKKwABI+iuwje0H8wTJS4C1bD+T9y8D3Gx74yLjHAxJn7Z9SpP2T1a1ormktUkFfd/J/FFvtwG7A8+Q6vNdUZXzU6pm/m/gyvpBBJJ+Y3un4iIbPEmbMn+Y+aOktaF2t/35IuKJK6je8XvbC60wK2kv21VdSffJ2qgv2zdJur+WnHLbSxXrMqq3UL+/pIuAmwuIpS2clqnYJBcjXR14wvajdYfsXUxkgzYpf3bdpOqmNTurwvbvgd/nOoPbk8q97UjqNu+4GCTRI5olp2xCJ+Nos8burv80OaaqLxZbNTbY3hnYuYBY2sr2I7ZvrSWnCg/8uBVYC1i74eO2IoMaLElLS/qwpHNIRWNPI3WVP77orxw6cQUVFjWHqOw2lVT/DnzVhm1I/eiVofmrmo7VwiucLge8tsMhtY2kTUgFlscAtcLEAlajmm+UPm77scZGSZUp3SRpKWAb0sjDjwAvkxYw/BvwTttzizyfSFBhUtEBtOAh0oCIvohUuqVKHsmfX6x7XDMb+Hpnw2mryo96k/RD2wdCGnXYx2HfIE3bqIKZpDwwhTRg5fKclK6rK4J72aKeYChFgupxts+sPZZ0rO1Di4xngL6zuMXUJBVSQ2ywbH8LQNJ9tn9VdDxt1g2j3naTtLh5QVtRnQT1eaD281+KkvWoxCi+HpTnQR1E6j+vXwNqnCu4VHUjSVvbvqroOFohaTVgPeAm0u/oEGBJ4Aeev6JrpXTDqDdJTwBXLOawbWy/vhPxtEsu4vvfpO6+f5GmOWxk+1VJ29m+vJC4IkH1Hkl3kUaDNXa1HGJ7/cICa5PcPfH+ouNoRZ6z9nfgUOC7pDJAfwZea3uXImMbrLoK7QutVO2KrBIs6Qe2FzkvqD/HlJnSysAfISWrl4FNba9bSCyRoHqP+l4efWPbtxcQUlspLydedBytqC2Pkm9iP0q6un1K0k2LGJFZarkqxicam4Fzbb+7gJDaRtKqtp8qOo52k/Q64Lqi6kLGMPPedImkZi8I+3Q8kqGxuC6YKlg2f94RuLXuxW9uH8dXwcfzEPP6j4ep7oKF9c4rOoChkOcVbl7U949BEr3pAODoPIm1dj+jNtz3s4VF1Qb53s3lSjMnl6gVvqygKyT9kfQ7+VC+R/At0rIUlZRr7b2HVHpqFDADOD1PDq26Ug0uaKci73nGFVRvepbUv/xR0otF7ePPRQbVCkmrS7qatD7Ub4AVgemSKlfmCMD2t4FdSF17t5KunC4GvlBoYC3IgyQuINXem54/XyBpj0IDa48niw6gG8U9qB7U170mSetXaT2oepL+l9S1dx5wke0tJa0KTLb9wWKja5+qrjUEIOn3wI62Z9W1rQJMsb1pcZENjqTdbZ9TdBzdLLr4elBeLXM08EngNaTJrlvavrTYyFryWtsnAUgyQB5UUMleglx14ZssPBVgdapZdQFgbn1yArA9S1JV76v9UNJ44DLSci9VPY/SigTVg/IaSqeR5tisDRwN7CLprbYXVZmhzIZJWtd1S4bnJLzMIr6mzCpfdaGJmZKOIJ1XbTmHCVT3vtr5pCkAHwJOzQVjbwQutR1dfm0QXXw9SNJNwC62n6wfki3pBtuFjdhphaRtSPc3bgc2AO4A3gXsZvuaImMbDElX216oYKyk/1pEiZ1Sy12u55CWfa+98FwN7Gl7ZmGBtUFey+ojpBWBV7Qdb/7bIH6IvemVbnuHZ/tKSRuSui3/DDwGfCEPY66isyV9lIaqC8D/kBZirIQ8j2uDvPkH21tJej1pFN+Stm8pLrrW5DdF2+eP1UgrOB9C6vILbRBXUD1I0oWkrqPTgbNIo/n2Js0Y/1iRsbWbpE/YrtwclW6ougDzRu6dSrq63cf2i3X7TgXeAmxfv8x4VeSeiPHAKcAR9WuRhfaIBNWDJI0kdbXURreZ1NWyh+2nCwusBXn13J1YeFDBBNtrFxPV4HVL1QVJVwE/tN108rSkT5HeGH26s5G1R660sC2wJenv7hbSPagZhQbWJSJB9bC6rpbHbD8h6e227yw6rsGQdCkwGriX+YMKoIKFO6Hve02SXleld+qSbrS9WavHlFF9EVVJ40hVwb8IvMb28EKD6xJxD6oH1apH236cvFpmvgI5AxhXaHCDtzrwNje845JUyXfmXVR1oT/Lnby6+ENK6YS8VMgOwNLA5cBngCsLjaqLVHKOSGjZ5pLeVtvIc27uJt0PqKrbgRWatFfyb7yLqi68LGmtvnZKWpu0rHgVrUIaLr8LsIbtT9m+oKrLoZRRXEH1ppuAgySdwvwBEocBGxUaVWtWBP4o6Vbm1xeEdH/gF8WE1JLPAG9tVnUBWOQijSXzHeA6SUeT7nP+PbevCWxN+rvbq6DYWvUZ2xcWHUQ3iwTVg2x/WNLSwK+BVUldYzMkbVFsZC3ZhDRarNGLTdqqoCuqLti+QdJngZ8BJzfsfoA0su/mzkfWOtsXdkk3bGlFguoRucuo0RTgK6Rq2S+QFsfboMlxVXCM7VMaGyX9pYhg2qBrqi7YviKv4rwRsE5ufhC4s/GeYZXk/6ljgUtI3bAjSd2wh9iu0lVuacUovh4h6RngrsUctmE3LPleT9Kxtg8tOo6B6uaqC92i24rfllFcQfWOybYPWNQBkr7XqWDaTdLypDIzGwHL1e0aR7oyLL1urrrQpbqiG7bMKjnCKQxcs+QkaZSkTSStmY85pPORtc0vSKPB1gHOBCYDs4DfFhnUAO1GqiF4CHmyse3Hbd8G7CtpmqSuusKtuJmSjsj/R8Py58OpYDdsWUUXXw+qrZMEvD83GbiOVFi1kt1Hkq6z/X5JU21vUdf+a9u7FBhav3V71YVuE92wQy+uoHrTicD1pK6jYaQKDNcDJxUZVItqc2mWkPRfAPlq4219f0npDO8rOQHYPgNYr4PxhEWw/VSuOP9fwLuBUba3jeTUPpGgetMI29/J3Udzbf/d9jHAiKIDa8FfJH0eOJs0H+o20kixqYVGNTDdXHWha+X/n9tsPwEg6cdFx9QtYpBEbxomaeX6CtL5amNYgTG1pP4eWx5avgVwINVasPBlSWvZfqjZzopXXegKkq5b3CHAhqSafKFFkaB600nAfZKmMH+OzfakOVGVlyeH3kkaLPHTouMZgG6uutAt1iCtoqs+9os0yCW0QQyS6FGS3kt6sRtFWtxvUjcOY5Z0QZXWuJK0LanqwpiGXQ8A+9u+oeNBhXkkTQeeAw62fUcfx+xoe0pnI+tOcQXVgySdDjxoe2LRsbRC0kq2ny06jnbq1qoLXeR52+8HkHQGefSe7X1qB0Ryap9IUL1pU7qjj/w3kt5P390tlZQT0fT8Ecql/k3Ct4BzgV0LiqXrxSi+3jQdeKmxUdKxBcTSis1JI9/mLuJj58KiC91o3psh2w8Dc2w/usAB0gc6HVS3iiuo3jQLuE3SNaT+9JqPU5GyQNndwJcWsV/AkZ0JJfQKSaIuUTVuA4cD13Y6rm4UgyR6kKQngGYTQiu1PLqkD9q+ZjHHvNf2TZ2KKXQ3Sa+yYDefGrYBsL1kx4LqYnEF1ZtOtH0UgKRlSYVI/yXpywXHNSCLS075mEhOoZ36c9V+Qodi6XqRoHqEpGWAzwLrkrsf8oq6n0oPdQfwieIiDKESvrq4of6SvtqpYLpddPH1CElnAjuQ5tOsT1rd9K3Az0nlcz4GLGf7o4UFGUIIdSJB9QhJDwCb5PVq1gNuA1azPSfvXwK4xfa7iowzhBBqYph573iytria7T+TFsSbU9tp+1XgP0UFF0IIjeIeVO94uWE47Nwmw2NDCKE0oouvR8Tw2BBC1cQVVO+I4bEhhEqJK6ge0c9JrYs9JoQQOiUSVAghhFKKUXwhhBBKKRJUCCGEUopBEiF0iKT7gSfz5ljSwJT78vbqtscWElgIJRUJKoTOedL2FgCSJgFL2d4jb08tLqwQyim6+ELonMMGuS+EnhQJKoQOsX1Ls/a85MnJkl6QdGpu+6qkpyUdL+kiSXMkfVvSFZL+IOkESUvWPcdXJU2T9DtJP8nV60OotEhQIRTM9gvAB0j/j0fm5h8DN9o+yPbOpHtXbwK2AzYG3g/sCyBpd2Cf3LYZsBpwcAdPIYQhEQkqhBKw/RRwNbBHbvoQcHnDYec7mQP8mvnrd00AzrP9H6eJjecCew591CEMrRgkEUJ5nAV8EzgW+Dhpgcl6z9Q9ngWskR+PAnaTtGXeHk5a4yuESosEFUJ5/Bb4haRtgFdtP9uwf+W6xyOAJ/Ljx4CrbX+/tlPSiCGNNIQOiC6+EEoid91dAJwBnN/kkI8qWZa0AvJ5uX0S8DFJwwHyldQvhj7iEIZWXEGF0GGSjgO2TQ91nO36AQ1nATuz8P0ngD8AlwH/BVwDnAZge7KkNYAbJP0beB6YOISnEEJHRLHYEEpE0vrA520f0ND+MDDB9tQi4gqhCNHFF0IJSPqkpKVII/LOLDicEEohElQI5bARcCcwwvZt9TskXQSsDvxI0juKCC6EIkQXXwghhFKKK6gQQgilFAkqhBBCKUWCCiGEUEqRoEIIIZRSJKgQQgil9P8BPQTT1OTBodcAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import rcParams\n",
    "import matplotlib.pyplot as plt\n",
    "ax = df_traffic_type.plot(x=\"Type\", y=\"count\", kind=\"bar\")\n",
    "ax.set_xlabel(\"Type\", fontsize=12)\n",
    "ax.set_ylabel(\"Count\", fontsize=12)\n",
    "ax.set_title(\"Traffic Type Count\")\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12) \n",
    "plt.rc('font', family='serif')\n",
    "#plt.xticks(rotation=90)\n",
    "plt.tight_layout()\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/Traffic_Type.pdf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Clean Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Missing Values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def find_missing_values(df):\n",
    "    missing_value_dict = {}\n",
    "    for column_name in df.columns:\n",
    "        missing_value_count = df.where(df[column_name].isNull()).count()\n",
    "        missing_value_percent = (missing_value_count / df.count()) * 100\n",
    "        missing_value_dict[column_name] = {}\n",
    "        missing_value_dict[column_name]['count'] =  missing_value_count\n",
    "        missing_value_dict[column_name]['percent'] =  missing_value_percent\n",
    "\n",
    "    missing_value_pd = pd.DataFrame(missing_value_dict).T\n",
    "    return missing_value_pd "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_value_traffic_pd=find_missing_values(df_traffic)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>EventId</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Type</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Severity</td>\n",
       "      <td>4739024.0</td>\n",
       "      <td>18.866256</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TMC</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Description</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.000016</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>StartTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>EndTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TimeZone</td>\n",
       "      <td>26621.0</td>\n",
       "      <td>0.105979</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>LocationLat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>LocationLng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Distance</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>AirportCode</td>\n",
       "      <td>45108.0</td>\n",
       "      <td>0.179577</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Number</td>\n",
       "      <td>14818638.0</td>\n",
       "      <td>58.993629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Street</td>\n",
       "      <td>11.0</td>\n",
       "      <td>0.000044</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Side</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>City</td>\n",
       "      <td>714.0</td>\n",
       "      <td>0.002842</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>State</td>\n",
       "      <td>1.0</td>\n",
       "      <td>0.000004</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>ZipCode</td>\n",
       "      <td>20263.0</td>\n",
       "      <td>0.080668</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  count    percent\n",
       "EventId             0.0   0.000000\n",
       "Type                0.0   0.000000\n",
       "Severity      4739024.0  18.866256\n",
       "TMC                 0.0   0.000000\n",
       "Description         4.0   0.000016\n",
       "StartTime           0.0   0.000000\n",
       "EndTime             0.0   0.000000\n",
       "TimeZone        26621.0   0.105979\n",
       "LocationLat         0.0   0.000000\n",
       "LocationLng         0.0   0.000000\n",
       "Distance            0.0   0.000000\n",
       "AirportCode     45108.0   0.179577\n",
       "Number       14818638.0  58.993629\n",
       "Street             11.0   0.000044\n",
       "Side                0.0   0.000000\n",
       "City              714.0   0.002842\n",
       "County              0.0   0.000000\n",
       "State               1.0   0.000004\n",
       "ZipCode         20263.0   0.080668"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_traffic_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "19650384.0"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_traffic_pd['count'].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Drop columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Drop the description, timezone, airport cdoe\n",
    "df_traffic = df_traffic.drop('Description')\n",
    "df_traffic = df_traffic.drop('Number')\n",
    "df_traffic = df_traffic.drop('TimeZone')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Drop rows with null at Airport Code, City, State, Zipcode, street\n",
    "df_traffic = df_traffic.filter(df_traffic.AirportCode.isNotNull())\n",
    "df_traffic = df_traffic.filter(df_traffic.City.isNotNull())\n",
    "df_traffic = df_traffic.filter(df_traffic.State.isNotNull())\n",
    "df_traffic = df_traffic.filter(df_traffic.ZipCode.isNotNull())\n",
    "df_traffic = df_traffic.filter(df_traffic.Street.isNotNull())\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>EventId</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Type</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Severity</td>\n",
       "      <td>4730955.0</td>\n",
       "      <td>18.868222</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TMC</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>StartTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>EndTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>LocationLat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>LocationLng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Distance</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>AirportCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Street</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Side</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>City</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>State</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>ZipCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                 count    percent\n",
       "EventId            0.0   0.000000\n",
       "Type               0.0   0.000000\n",
       "Severity     4730955.0  18.868222\n",
       "TMC                0.0   0.000000\n",
       "StartTime          0.0   0.000000\n",
       "EndTime            0.0   0.000000\n",
       "LocationLat        0.0   0.000000\n",
       "LocationLng        0.0   0.000000\n",
       "Distance           0.0   0.000000\n",
       "AirportCode        0.0   0.000000\n",
       "Street             0.0   0.000000\n",
       "Side               0.0   0.000000\n",
       "City               0.0   0.000000\n",
       "County             0.0   0.000000\n",
       "State              0.0   0.000000\n",
       "ZipCode            0.0   0.000000"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_traffic_pd=find_missing_values(df_traffic)\n",
    "\n",
    "missing_value_traffic_pd\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Add start date column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+\n",
      "|          StartTime|\n",
      "+-------------------+\n",
      "|2019-09-29 15:49:24|\n",
      "|2019-09-29 15:38:00|\n",
      "|2019-09-29 15:33:00|\n",
      "|2019-09-29 15:48:00|\n",
      "|2019-09-29 15:50:00|\n",
      "|2019-09-29 15:59:00|\n",
      "|2019-09-29 15:47:00|\n",
      "|2019-09-29 14:33:00|\n",
      "|2019-09-29 15:50:00|\n",
      "|2019-09-29 15:52:00|\n",
      "+-------------------+\n",
      "only showing top 10 rows\n",
      "\n",
      "+-------------------+\n",
      "|            EndTime|\n",
      "+-------------------+\n",
      "|2019-09-29 17:19:04|\n",
      "|2019-09-29 17:05:30|\n",
      "|2019-09-29 17:43:34|\n",
      "|2019-09-29 16:33:23|\n",
      "|2019-09-29 16:36:57|\n",
      "|2019-09-29 16:40:14|\n",
      "|2019-09-29 23:26:14|\n",
      "|2019-09-29 17:16:16|\n",
      "|2019-09-29 16:54:57|\n",
      "|2019-09-29 16:34:33|\n",
      "+-------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_traffic.select('StartTime').show(10)\n",
    "df_traffic.select('EndTime').show(10)\n",
    "df_traffic = df_traffic.withColumn('StartDate', to_date(df_traffic.StartTime))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+----------+\n",
      "| StartDate|\n",
      "+----------+\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "|2019-09-29|\n",
      "+----------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_traffic.select('StartDate').show(10)\n",
    "df_traffic = df_traffic.withColumn('EndDate', to_date(df_traffic.EndTime))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- EventId: string (nullable = true)\n",
      " |-- Type: string (nullable = true)\n",
      " |-- Severity: string (nullable = true)\n",
      " |-- TMC: integer (nullable = true)\n",
      " |-- StartTime: timestamp (nullable = true)\n",
      " |-- EndTime: timestamp (nullable = true)\n",
      " |-- LocationLat: double (nullable = true)\n",
      " |-- LocationLng: double (nullable = true)\n",
      " |-- Distance: double (nullable = true)\n",
      " |-- AirportCode: string (nullable = true)\n",
      " |-- Street: string (nullable = true)\n",
      " |-- Side: string (nullable = true)\n",
      " |-- City: string (nullable = true)\n",
      " |-- County: string (nullable = true)\n",
      " |-- State: string (nullable = true)\n",
      " |-- ZipCode: integer (nullable = true)\n",
      " |-- StartDate: date (nullable = true)\n",
      " |-- EndDate: date (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_traffic.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#We will take care of the severity later\n",
    "#Write\n",
    "df_traffic.write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/Traffic_cleaned.parquet\")\n",
    "#df_traffic = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/Traffic.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Read df_traffic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_traffic = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/traffic_weather_data/Traffic_cleaned.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Weather data Saving in Parquet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#df.withColumnRenamed('StartTime(UTC)', 'StartTime').withColumnRenamed('EndTime(UTC)', 'EndTime').write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/WeatherRaw.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_weather = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/traffic_weather_data/WeatherRaw.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['EventId',\n",
       " 'Type',\n",
       " 'Severity',\n",
       " 'StartTime',\n",
       " 'EndTime',\n",
       " 'TimeZone',\n",
       " 'LocationLat',\n",
       " 'LocationLng',\n",
       " 'AirportCode',\n",
       " 'City',\n",
       " 'County',\n",
       " 'State',\n",
       " 'ZipCode']"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_weather.columns"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Clean Data"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## missing values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "missing_value_weather_pd=find_missing_values(df_weather)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "41866.0"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_weather_pd['count'].sum()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## remove null rows"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_weather = df_weather.filter(df_weather.City.isNotNull())\n",
    "df_weather = df_weather.filter(df_weather.ZipCode.isNotNull())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>EventId</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Type</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>Severity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>StartTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>EndTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TimeZone</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>LocationLat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>LocationLng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>AirportCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>City</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>State</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>ZipCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             count  percent\n",
       "EventId        0.0      0.0\n",
       "Type           0.0      0.0\n",
       "Severity       0.0      0.0\n",
       "StartTime      0.0      0.0\n",
       "EndTime        0.0      0.0\n",
       "TimeZone       0.0      0.0\n",
       "LocationLat    0.0      0.0\n",
       "LocationLng    0.0      0.0\n",
       "AirportCode    0.0      0.0\n",
       "City           0.0      0.0\n",
       "County         0.0      0.0\n",
       "State          0.0      0.0\n",
       "ZipCode        0.0      0.0"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_weather_pd=find_missing_values(df_weather)\n",
    "missing_value_weather_pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Add start date column"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "+-------------------+\n",
      "|          StartTime|\n",
      "+-------------------+\n",
      "|2019-07-07 07:08:00|\n",
      "|2019-07-07 20:56:00|\n",
      "|2019-07-08 17:22:00|\n",
      "|2019-07-08 21:56:00|\n",
      "|2019-07-08 22:06:00|\n",
      "|2019-07-08 22:11:00|\n",
      "|2019-07-09 08:20:00|\n",
      "|2019-07-09 10:09:00|\n",
      "|2019-07-09 10:36:00|\n",
      "|2019-07-09 10:54:00|\n",
      "+-------------------+\n",
      "only showing top 10 rows\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_weather.select('StartTime').show(10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_weather = df_weather.withColumn('StartDate', to_date(df_weather.StartTime))\n",
    "df_weather = df_weather.withColumn('EndDate', to_date(df_weather.EndTime))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- EventId: string (nullable = true)\n",
      " |-- Type: string (nullable = true)\n",
      " |-- Severity: string (nullable = true)\n",
      " |-- StartTime: timestamp (nullable = true)\n",
      " |-- EndTime: timestamp (nullable = true)\n",
      " |-- TimeZone: string (nullable = true)\n",
      " |-- LocationLat: double (nullable = true)\n",
      " |-- LocationLng: double (nullable = true)\n",
      " |-- AirportCode: string (nullable = true)\n",
      " |-- City: string (nullable = true)\n",
      " |-- County: string (nullable = true)\n",
      " |-- State: string (nullable = true)\n",
      " |-- ZipCode: integer (nullable = true)\n",
      " |-- StartDate: date (nullable = true)\n",
      " |-- EndDate: date (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_weather.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 63,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_weather.write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/Weather_cleaned.parquet\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Read Weather"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_weather = spark.read.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/data_folder/traffic_weather_data/Weather_cleaned.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['EventId',\n",
       " 'Type',\n",
       " 'Severity',\n",
       " 'StartTime',\n",
       " 'EndTime',\n",
       " 'TimeZone',\n",
       " 'LocationLat',\n",
       " 'LocationLng',\n",
       " 'AirportCode',\n",
       " 'City',\n",
       " 'County',\n",
       " 'State',\n",
       " 'ZipCode',\n",
       " 'StartDate',\n",
       " 'EndDate']"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_weather.columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_weather_type=df_weather.groupBy('Type').count().toPandas()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAagAAAEYCAYAAAAJeGK1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+j8jraAAAgAElEQVR4nO3de5wcVZ3+8c9DDAQWdiMkRjSLAREC6BoxKAIKArJcvKF4A9QIGBRXhR+CsF6WFXYXVLysK2pEjCCIy643RLkIJFyjJIKg3AQNEAUJgYSsErnk+f1R1aTT6UlmMjNV1ZPn/Xr1a7pOne76Vs9MfbtOnTpHtomIiGia9eoOICIiopskqIiIaKQkqIiIaKQkqIiIaKQkqIiIaKQkqIiIaKQkqOgZkn4qyZJukDSlLNta0pOS/rZc3l7STZLulPSKIdruFpLmSBq2ezIkLZY0q3wslnRP2/Ks4dpuH7GMk/Q1SddKml3u+ymSnldlHBFJUNEzbO8H/AH4rO2byuLdgVHAbmWdW4GvANNtX78225E0qT0Z2b4XePtgYu+Hm2zvYXsP4CbgnLblykjaGLgK+JXtXW3vDrwe2B84poLtnyRp5nBvJ3pDElT0mtnAHm3LuwIXd5S9HJhTXUhD4sS1XDfUPgIstn1Gq8D2gxXHEAGM0AQlaSdJd0maNoDXrC/p9LJZY56kqyQ9axjDjLUzi+KsqWVj4MKOsg1sL5M0WtJnJF0n6RpJ/yJJAJJ2kHSRpMskXS9peln+d8D55fNWE9uo1htLerekn0m6Q9IubeXPkvS98u/mOklvLMtfVjY5zpd0XPl+yzt3ag1new9I+q2kRySdUr7v6ZIWSfpo+Z6WdKKkSyX9RtIJbbH1+Tl0cRBwaZfyy4CPle+ncl/mlO93lqRNynWXlrFMkrRhe9No+T/WivUDkn4i6W5JB5br3wZMA/Yt631sNZ9JrAtsj6gHcCBwHjAPmDaA130Z+EDb8leALevenzxW+T1tAxh4FjAJOA3YAXiCIlltDfxrWfdjwBUUTYCjgeuAQ8t1LwdeXj4fDdwGvKBcnlT8a6y03Unldt9aLh8PXNK2/jLgU+XzzYFFwKRyeQ/gcWDvcvkza9jHWcApHWU7AouBMeXyBOAbbesNnFY+3xS4H9hnTZ9Dl20/Brx3DfG9E/gNsFG5fGaXWFr73u2zNHB8+fytwB1t604CZtb9d5ZHMx4j8QzqBtsHA0v7+wJJE4DXUSQlAGy/3/bvhyG+GATbd1IcfHcvH7OBWykO3rsBr6I4wEPxbfxbtp+y/QRwAcXBFeC3wOGSrqNILpsDL+lHCBeXP38FbAkg6bnA3sA3yhjvB64B3tH2uj/b/lm5/riB7HP5ml8CC4A3lkUHU57ptTm/rPsw8BNWXDebRt+fw9p4F/Bd238pl78JvLP9TLMfWp/jzZSfY0SnZ9QdwFCzvaCvdZLeBRwF/JXiYvv7bD9KcaCbD/yLpL2BPwMn2756+COOtdC6DjUaONa2JV1F8XvcDPhOWW8i8P8kvadc3pgikQF8DhgLvNL2U2VPuY3WtOHy7wWKv6H127YDcHZb54pxwC1tL13S773r29kUyeF8YC/gix3rH2l7vgh4UVt8fX0One4CnruGOCYCC9uWF1L8LiYAf1zDa1tan+Oy8rURqxhxCaovknalOChtZ3uhpM+Uy0dQNEO8Avie7V0lvQq4VNJk2/fUFnT0ZRbwIeAW260z5dkUZyx32X6sLLuPoqnsAgBJ61EkJYCXAWfYfqpcHsxB8r7y50G2F5bbGsPQ/399Gzi5/BJ1m+3Oa1mbUnzRgiJB3t8WX1+fQ6fvUvTYO6m9UNKbKf53Tinfb3zb6vEUTax/KpefADYon/e1nYg1GolNfH2ZBlzYOoBQXKc6pLxYvAHwFMV1KGxfBdwIHFJDnLFms4HtgXs7yl7GigM0wEzg4Lamp3dTXuinOFN4OYCkzYF/aHvd0rJ8I0knSNp5dcHY/iNFM2F7s9lXgVf3e4/6odzObIozqXO6VDkIQNJmFEmm1QQ4k74/h06fAzaS9P5WgaRJwL8Dl7S931slbdj2fue0JfvfAy8sn+/fr51bYWm5fUn6/gBfGyPMupSgJgJ7asWNj1+i+Ma3GUXTyMNl+3zLAlY03USD2L4deIDiYN1yC0Uz2qy2ss8AdwDXSroS2BP4RLnueGA7SdcD/0ZxUD1B0p62F1F8gbmKointPlb07PuhpC2ALwDPlnR2+X6HArtIulrS1cCdti+UtH1b3VmSNl3dvpXvN4Xims7/61LlbGCh7Zu7rPuTpEuAq4Ev2m71xlvd57CS8rrSnsCOZe/GKymurR1p+4ayznkUSeoKSddQnCke3fY2/wx8WtJPKJsSW70hJbViOr+jx2Sr/IcUnV6uKx+xDpM9MicsLJPQTNszy+UZwBO2P9BWZ5zthyTtBvwM2Nj2k+W6K4HLyyaNiEaQtB/wQtuf6Sg3Ra/T+bUEFjEM1qUzqJnAAZKeCSBpW4r7Z6D4pnYXxbfg1rqdWLWXVEQtyg4+UPyNnltnLBFVGXFnUJJeCpxO0UzyAHCr7TeV6w4F/gn4C8V9KR8quy0j6QXADGBM+Vafsv3TisOP6ErSfwMvoLiO+sm28vUpbqzdHfg58Gbbf6gnyoihNeISVEREjAzrUhNfRET0kBFzH9S4ceM8adKkusOIiIgBmjdv3kO2x3eWj5gENWnSJObOnVt3GBERMUCSug6IkCa+iIhopCSoiIhopCSoiIhopBFzDaqbJ554ggULFrBs2bK6Q6nFmDFjmDhxIqNHZ7DoiOg9IzpBLViwgE022YRJkybR9wSiI5NtFi1axIIFC9hyy0y3ExG9Z0QnqGXLlq2TyQlAEpttthkLFy5cc+WIWMmkEy6qZDvzTz2gku30qhF/DWpdTE4t6/K+R0TvG9FnUJ2G+ltRvv1ERAyfSs6gJI2WdHQ5J8zscp6Zvfqou4ek21vzNpWPg6qIsxfNnz+fH/zgB3WHEREx5Ko6g3ou8GFgiu0lkl4D/FDStn2MvHxqax6nWL1WgnrjG99YdygREUOqqmtQS4FP2l4CYPsyYBmwS0Xbr9WnPvUpPv7xj3PyySdz0EEH8cc//pEjjjiCT3/607z3ve/l2muvZdmyZUybNo1p06YBMGPGDFpjC1544YVsvfXWHHPMMRxxxBHssssuzJ8/n8cff5yZM2dy0003cdJJJ3H77bfXt5MREUOskgRle5Htc1rLKq7erw/01cXsjZKuLKfP/riknr1WdskllzBnzhxOOeUUPvGJT7Dffvtx7LHHsu+++3L88cdzyimn8La3vY0NNtjg6eQEMH369Kefv+51r2O33XZj8uTJnHnmmRx44IH87//+L+uvvz7Tpk1jypQpnHTSSUyePLmGPYyIGB519eLbHbgHuKrLuiXA9cBrgP2AvYHTur2JpOmS5kqa29Tu1DfffDNbb73108uHH344N998M1tttRUAEyZMYMmSJTz00ENrfK9tttkGgPHjx7N06dLhCTgioiEqT1CSxgD/Dkyzvbxzve0bbZ9m+0nb/0eRnN6nLn2mbc+wPdX21PHjVxmpvRFe/OIXc/fddz+9fNZZZ61U9sADDzB27FjGjRvHJptswqOPPgrAvffeu8p7des2PmrUKGyzbNky7rjjjmHai4iI6lXadFYmmRnA523P6+fL7gU2AsYDDw5m+3V0C99nn32YM2cOJ554ImPGjGGzzTbjs5/9LB/72Mf47W9/y1133cX555+PJKZMmcLy5cs5+eSTmTRpEkuWLOE73/kOz3/+87n55ps555xz2Hbbbbnwwgt55JFHuOuuu9hhhx1YsGABxx57LK997WvZdtttK9/HiIjhUOmU75I+B/zO9n9J2gCYYPvejjofAmbYXlYu7wn8FNjI9lN9vffUqVPdOR/UbbfdxnbbbTfUu9FT8hlEDFxGkqiWpHm2p3aWV9bEJ+mjFGdsMyVtDDwfOEzSuLIzxKZl1R2Bt5avGQV8EDhvdckpIiJGnqpu1N0GOJUi2SwtH78pV28ITKZoxgP4OvB2SVcCcyh6+n24ijgjIqI5KrkGZftOYHUDw41vq3stsP8QbnudHZOuyubbiIihNqIHix0zZgyLFi1aJw/Urek2xowZU3coERFrpWdvgO2PiRMnsmDBgnV2yonWhIUREb1oRCeo0aNHZ7K+iIgeNaKb+CIionclQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCMlQUVERCNVNaPuaElHS5olabak6yXttZr6h0iaJ2mupNO1rs44GBGxDqtquo3nUkzbPsX2EkmvAX4oaVvbf2ivKOmFwOnAC4GHgcuBo4AvVxRrREQ0QFVNfEuBT9peAmD7MmAZsEuXuocDP7H9kO3lwFnA+yqKMyIiGqKSBGV7ke1zWstlk936QLepbncCbm9bvhXYQdKGwxtlREQ0SV2dJHYH7gGu6rJuArCkbXkxIGBcZ0VJ08vrVHPX1WndIyJGqsoTlKQxwL8D08omvG7c7aWrVLJn2J5qe+r48eOHMsyIiKhZpQmqbNqbAXze9rw+qj0IjG1bHkuRsHKKFBGxDqn6DOp04Be2L5C0gaQtutS5Adi2bXl74De2H6skwoiIaITKEpSkj1J0a58paWPg+cBhksZJulrSpmXVM4H9JW0maT1gGvDVquKMiIhmqOpG3W2AU4EPUnQ5Xwr8ply9ITAZ2AjA9q+BjwCXAj8HbgLOqCLOiIhojkpu1LV9J106ObRZqYeD7XOBc4c1qIiIaLSMxRcREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY2UBBUREY3U7wQl6SVdyj4oabuhDSkiImJgZ1Cndym7BfjGEMUSERHxtDWOZi7pVeXTsZJeycqjkm8I/N1wBBYREeu2/ky38a3y57OBszvWPUrmaoqIiGGwxgRle0sASWfZPmz4Q4qIiBjANai+kpOko/v7HpJ2knSXpGmrqbOHpNslzWp7HNTfbURExMjQ7xl1JY0DDge2BNZvW7Uv8IV+vP5A4C3Akn5s7lTbM/sbW0REjDwDmfL9BxTJ5WZgWVv5X/v5+htsf1/SrAFsMyIi1lEDSVCjbB/QWSjpl/15se0FA9jWGyW9myK+SyjOqJ4cwOsjIqLHDeQ+qNmStuhSvuNQBVNaAlwPvAbYD9gbOK1bRUnTJc2VNHfhwoVDHEZERNRpIAlqZ+A3ZUK4ovUAPjyUAdm+0fZptp+0/X8Uyel9ktSl7gzbU21PHT9+/FCGERERNRtIE98E4J86ygRsPnThdHUvsBEwHnhwmLcVERENMZAEdbztCzsLJd0zhPEg6UPADNutjhgTgMeBRUO5nYiIaLaB3Ae1SnIq7TyYACSNk3S1pE3Loh2Bt5brRgEfBM6z/dRgthMREb1lIPdBndXHqn2B/+jH619KMeDsFOAESa+3/SaK8fwmUzTjPQx8HfiYpPcAGwM3Ah/pb5wRETEyDKSJ79XAzLblsWXZBf15se15wB5dyu+juL7UWr4W2H8AcUVExAg0kAR1jO0ftBdIGgPMGNqQIiIiBnYN6gddip8CVpnIMCIiYrAGcg3qio6iDYAXAD8d0ogiIiIYWBPf5sCpbcuPA7+3PWdoQ4qIiBhYgjrK9pXDFklERESbfico21dK2hV4DzARWACcZfu64QouIiLWXf3uJCHpXRRdypcD88qfF0g6dJhii4iIddhAmvjeB7zI9tNDDknaDLgQ+PZQBxYREeu2gYxm/kR7cgIol58Y2pAiIiIGlqAWSvqkpImSNih/fpyMMB4REcNgIAnqKOCVFNNf/AW4B9itLI+IiBhSq70GJekZwPbl4i22XyPpORS9+EbZvn64A4yIiHXTms6gDgbmAh8F1gew/UfbvwAOlzSnbZqMiIiIIbOmBHUo8Hrbh9j+a/sK20cAX6OYkj0iImJIrSlBjbF9cV8rbX8T2HZoQ4qIiFhzgnqyH++xfCgCiYiIaLfGBCVpy75WStqKYsqNfpG0k6S7JE1bQ71DJM2TNFfS6ZLU321ERMTIsKYE9W/AFZIOl7SFpFHlYwtJRwCXAZ/sz4YkHQgcAyxZQ70XUkwN/4/Ay4AdSVf2iIh1zmoTlO3ZwPuBfwZ+TzHFxuPl848Ah5VTtPfHDbYPBpauod7hwE9sP2R7OXAWxTBLERGxDlnjWHy2L5a0NcWZzPPL4ruAG227vxuyvaCfVXcCftS2fCuwg6QNbT/W3+1FRERv69dgsWUimlc+htsEVm4GXAwIGAfc115R0nRgOsAWW2xRQWgREVGVgQx1VKVuZ2ardJSwPcP2VNtTx48fX0FYERFRlSYmqAeBsW3LYykS1sJ6womIiDo0MUHdwMo3/24P/CbXnyIi1i0DmbBwWEgaB3wfeIPth4EzgcvKyRAfAaYBX60vwt4y6YSLKtnO/FMPqGQ7EbHuquwMStJLJc0CpgAnSPpeuWpDYDKwEYDtX1N0Yb8U+DlwE3BGVXFGREQzVHYGZXsesEeX8vuA8R1l5wLnVhNZREQ0UROvQUVERCRBRUREMyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREIyVBRUREI1U5YeEYSTMlzZE0V9I+fdSbJukmSbPaHrtVFWdERDRDlVO+nwTI9s6StgHmSNrO9p+61D3a9qwKY4uIGLEmnXBRJduZf+oBQ/p+lZxBSVoPOAL4BoDtO4EbgUOr2H5ERPSeqpr4tgI2A25vK7sVmNpH/SMlzS6b944a9ugiIqJxqmrim1D+XNJWthjYvkvdPwGXAjOB8cAsSaNsf2lYI4yIiEapuhefO5a1SgX7p7a/6cKDwJeArmdRkqaXHS7mLly4cBjCjYiIulSVoB4sf45tKxvbVr469wLP67bC9gzbU21PHT9+/CBDjIiIJqkqQd0NPAxs21a2PXBDZ0VJx3cUTQD+OHyhRUREE1WSoGwvB74OHAYg6QXAFOBcSdtJulzSqLL6fpJ2L+ttBBwJnFNFnBER0RxV3wf1VUlzyu2+w/YDkiYBk4HRwFPA6cAny67pGwOXA/9RYZwREdEAlSUo28uAaV3K5wDPbVv+MfDjquKKiIhmylh8ERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSElQERHRSFXOB9U4k064qLJtzT/1gMq2FRExEuQMKiIiGqmyBCVpjKSZkuZImitpn9XUPVbSvPJxXFUxRkREc1Q95bts7yxpG2COpO1s/6m9kqR9gfcCU8qimyTdaru69riIYVRV03KalaPXVXIGJWk94AjgGwC27wRuBA7tUv1I4Dzby8pp4s8F3ldFnBER0RxVnUFtBWwG3N5WdiswtUvdnYDvdNQ7avhCiyZLR5aIdVdVCWpC+XNJW9liYPs+6nbWe9YwxRURg5QvETFcZHv4NyLtClwDbGD78bLsZGBX23t21H0COMD2peXyXsCltkd1ed/pwPRycVvgjuHbi6eNAx6qYDtVGmn7NNL2B0bePo20/YGRt09V7s/zbI/vLKzqDOrB8ufYPp531h3btjwWWNjtTW3PAGYMUYz9Immu7W5Nkz1rpO3TSNsfGHn7NNL2B0bePjVhf6rqZn438DDFWU7L9sANXere0M96ERExglWSoGwvB74OHAYg6QUU3cjPlbSdpMsltZrwvgq8o7xvagxwcFkWERHrkKrvg/qqpDnldt9h+wFJk4DJwGjgKdsXS9oBuLZ83VkNuweq0ibFioy0fRpp+wMjb59G2v7AyNun2venkk4SERERA5Wx+CIiopGSoCIiopGSoCIiopGSoAZB0sfrjmFtSBox92pENI2ko+uOYW2VHdQaI50k+iDprH5U29f2c4Y9mCEm6ZfAhwF1rDLFjdK/LW8N6HmSLra9b91xDJWRtj8Akr5r+211xzFQksYBhwNbAuu3rerJ4wKApN8Bx1KM3vPnuuNZp2fUXYNXAzPL5+OB1wAXU9xwvBlwAPDjWiIbvGcCVwJ/ptifTYExwB8phjd5QNKbbd9cX4gDI+n1wGkUB4vRrWKKpNtzJL2BYn8m0cP7Ux7wVluFFWN19pofUIwbejOwrK38r/WEMyQepMgLX5C0EfBz4CLbd9cRTBJU3062fRYU3/CAqbaXtlaWzXvfqiu4Qfo28AvbF7YKygP8drZPk/Qm4IsUSbpXfA44juJg8WRZJlYeGb+XfBb4Z1Y++PXi/iwBVtfkJeDzFcUy1EbZXmX02rKFolftUraeXFBOk/R+4OeSFtrerupgkqD60EpOpee1J6dy/aOSevI0HtjN9ifaC2z/qGw7P8329yR9uKbY1tYttr/fWSjpXXUEMwTutH1BZ6Gkf6wjmEGYbnu1Q5X14N9ay2xJW9i+t6N8R+DCbi/oARtJeg2wP7AfRcvKxUAtgyXkGlQ/SLoIuJfizOMhiia/dwITu32DajpJvwLea/sXbWWvAL5m+x8kbQBcbftltQU5QJJ2omiG/RXQ/mXiC7Z3rCeqtSfpKGAjioPDo22rzre9Sz1RDY6kLfpY1ZP7JGkW8FKKWRTaf0dTbG9aS1CDJOlRiqT0DeAcYE6d16OToPqhvBh6BvBGirPOJ4H/BT5ou+eG15e0H/DfwAOsSLjPAt5K0eb8A+A62yfWFuQASToVOIaiDf3JtlUTbG9UT1RrT1JfBwV3m3qmqST9mqLZ6NFyn0yXzjm9tE8tkm4DTu0sBj5aR3PYUJD0NxRf9F5DMZPEjcCPbd++2hcOVzxJUP1XtsmOBxb2ei+3MunuD2wO3E9xIXRRvVGtPUl3UMwv9lBH+Zdtf6CmsNaapCttr3INUNIltnummU/S82zfUz7/me29u9TpWt50kl7Xfh23rfzVtq+sI6bBkrS57fvL56OA9wGfAh62/YLK40mCWnu92j22RdJEYCKwwPaCuuMZDEnnANNsP9VR/gbbP6wprLUmabTtJ+qOowqS9rA9q+44hkovHxfKwbw/D7wW2Jeip+9FFGdRP608niSo7vrbPbZHm4+eBZwHtGYzNnAFcLDtrpNDNp2kLwEvB2az8jWoaba3qieqwZF0CPAeyi8RFCP7n1dvVIMj6fnASyiur7WcYHv7mkIaEEkzgKNt/6WPY0TPHhfg6RnNr6dIShfZ/nWd8aQXX99GcvfYMyjug3oXxWzFzwLeDXwFOKjGuAbjIIoOBePKR8uYesIZHEnHA4cA36X4HY0Hjpf0HNufrTW4tSTpvRTXCZ9N0ZllNPAPwJq+DDbJlcBj5fNux4hePi4AnNrZw7dOOYPqg6Sd+tE9do11mkjSLNt79Le8F0j6hO2Tu5QfY7vnDhiSrgX2sr2srWxD4PJe7PEGT+/T7sBlretrkjanuOfwiFqDWwuSXtjtDEPSq2xfVUdMQ0HS3wKvZ8WZ+49sP7r6Vw2PnEH1oTPxSNqVVZtbrqsjtiGwgaRNbT/cKpC0KbBBjTEN1nskbWz7o+2FvZicSsvbkxOA7cckPdXXC3rA/9l+UtLTwwLZvl9ST/Z4ayWnssNRe5PeqUCvfomYCvwUeIQVZ+6fk7Sv7cpvQE6C6ofyZs9TKYY2mkfxS7tA0kdtf7vW4NbOV4DbJF3Iij/C/YGP1BrV4PypMzn1uDvK8SDPYsXv6N3AbbVGNThPlPer/VrS94CfUVw3XH/1L2smSbtQjOwxkZW7zvdys9RngANtX9MqKL+cn04NI8tkNPP+eR/wItvTbX/M9nSKtvOjao6r3yRdKekKSVNtnw28GVgOvBh4Cnhzj1+Av1rSKt1gJV1cRzBD4GiKA91lFEnpUorf0zF1BjVIx1McyE+guLH1AxT32hxSZ1CD8O/Aq4CrbK9H0QLxBuDfao1qcNSenABsX8uq965VImdQ/fNE5z1CtheVPV56hu09ASR9k/Jbnu39aw1q6LwMOKq8H2pJW/mUmuIZFNv/Bxwu6QhW3HtnSbUcKNaWpPNsHwxg+9a2VdPa6wAHVxzaUHjc9j3l/UKUtwVcWI4C0rMkvdL21W3Lu1HTWWESVP8slPRJVm5umUYxakGvaP8D+1eKbuZvrymW4TCB4ht5O1HciNyzXPRiav87u5wVtwf0gv0lranDwIsqiWTorSdpLPCwpGOASyiaLCu/oXUIHQf8RNISVhzrNqG4J6pySVD9cxRwLnASKw70l1GMx9crnv7mbXu+pL92DnIpaS/bl1cf2pA4vo+7+u+pI5i1UTZHvsX20rZhgVaq0qWs6R6hSKot01gxjQ0U+zSxwniG0n9RXLv9OPAjius0iylGou9Jtm8o71V7HfBc4D6Km3SXrv6VwyPdzPsg6RlA6+bBW8rmledQ/DONsn19fdENnKQrKb55txLV5R3LUHRh7qUpNlbSpO6xa0PSrmV7P5KuZ9UzXAHfsf2KyoNbS53d/LsN4dSrtwJ0kjSeonn5ml4aaLk/JB1t+wuVbzcJqruy596ZwAXAYbb/2rbuTOCFwP7tXbWbrMs38q7fxntx0E7os3vsWIrZTXtufh5Jf2/7vo6yNwE/t/2HmsIaNElXtK6F9qLyXrTjgKkUX4JOLrvKjwOOpJg/6W9sP7PGMNeaGjZLcJr4+nYo8Hrbq/QCs32EpPdQzHj63sojWzu/YuSOjAEN6x47BL7FqteaNqTo1vyq6sNZO5I26+hgtEonjy51muxrwD7A1RTXN8+QdBlFs95Siok+/6u+8AatUbMEJ0H1bUy35NRi+5tlkuoVx9mevboKko6rKphh0LV7bA/2emvNmTRG0t+z8gH9l8DfVR/VoPyPpJWaksvfSft+/Q+98yViZ2Cy7cXw9O/rNuA/gX+1/djqXtwDGjVLcBJU355ccxV6ZsoN2z8bijpN1qTusYMwnxUxd3bwWAp8udJoBm93VndtTfwAAAiGSURBVP5fEv3732qq+1vJCcD2vZJ+Y/uEVpmkya5p/qQhMFsNmiU4CapvT0ra0vbvu62UtBXFjZPRDI3qHru2yhs+kfSTEXKP2khrWl7e5Qzwzx1lX6F3zgg77Qx8oLyfcKVZgiluT6lUOkn0QdLuFN1hT6HoUt66MP1cijboE4F3tXpdRf3KXnyvpejFV2v32OEi6e22z687jv6StPeazsz7U6cp+tv9v4c7GzVqluAkqNWQtC9Fk8qkjlW/BY5c0zWdqI6k99j+ZkfZtymSVM8c0NtJejXwUlYeiLRn57caCSTdSD/OCG2/pKKQhpQaNktwEtQalKfuOwLPL4vuAm50PrhG6dZ9WdJGwBW2d64prLVWjlyyBzCZYoSC0cBuwN2296oxtHXaSDsjXB1Jkyg65dzefptNpTHkOBu9rDyQw6ojFEBx5vGGOpomBkvS1bZf2X5jq6S/Ab5qu5dGMImGK8cOPBI41/any7KvUNxCs4jiWvsbOqcgqkI6SUSv27L8uWHb85alFHN49aK/lD/Xl7S+7cfLsh1qjClGprcAH2pdspD0KorktKftq8rr8adRwxiQSVDR02y/B0DSh2z/Z93xDKGlkv4RuA64qhxwdSeKkTIihtJTHdfTpwGzXc4KbHt2a8T2qmU+qBgR2pOTpEmSXiypl2cI/hBwN8VApBdTnDn9it6cliKa7ekTlfK67YEUg2O3q+XetSSo6FmSjpL0K0nHt5V9haIjy6XA71XM4NqLJti+y/ZfbZ9U3t3/O2DTugOLEWexpCMlTabotfwkxZBawNPjXNaSK5Kgope12s5bF3bb284nAO+gaDvvRad3KbsZ+EbVgcSI9yHgbcBciok/39YasknSGRRzx327jsByDSp6WWPbztdWmWQBxkp6JSuPWLAhvTcWXzRcOaxR1w4QtmudHTgJKnpZt7bzYzvq9Nq4b98qfz4bOLtj3aPAGdWGE1GfJKjoZYslHQnMBj5Kg9rO15btLQEknWX7sLrjiahTT/3zRnRobNv5YPWVnCSdWHUsEXXJSBIRDSHpUOA828slndVHtVpmNo2oQ86gIppjV2BM+fzVFPNBdT5qGRMtog65BhXRELbf37b4CdurNE9KurvCkCJqlSa+iIaS9EzgAGBz4H7gItsZ6ijWGUlQEQ0kaR/gf4AHKUaUHlc+DrJ9WZ2xRVQlCSqigSTdBLzf9vVtZbsBX7b94voii6hOOklENNPi9uQEYPsa4OGa4omoXBJURDNdI2nf9gJJ+1EMgtta/m7lUUVUKE18EQ0k6ffA31NMuvgI8EyKGYL/0KpCMeL5RvVEGDH80s08opkWUwx+2xcBn68mlIh65AwqooEk7WT7hi7lm9u+f3V1IkaKJKiIBpK0RR+rzre9S6XBRNQkCSqiIST9GtjF9qOSlgNm5fmgAGy7p+a4ilhb6cUX0RwH2H60fH6F7VG212t/AFfWGWBElXIGFRERjZQzqIgGkvR+SbdIekW5PEXSdZK2qzu2iKrkDCqigSRdA7zD9n1tZVsBM2zvXV9kEdXJGVREMz3enpwAbP8OSAeJWGckQUU0k8rBYdsLduurcsRIlCa+iAaStCNwMfAosBAYD2xCMeX7jXXGFlGVJKiIhpL0t8DrgOcC9wE/tr203qgiqpMEFdFgkiYAE4BbgPVsP1VzSBGVyTWoiAaStLmkyyimev8+8HfAPEk71RtZRHWSoCKa6avA94DNgHttLwb2Af6j1qgiKpTpNiKa6W9tfwVAkgFsPygpXypjnZE/9ohm2kDS1u0F5Qjn69cUT0TlcgYV0Uz/CvxS0g3AdpIuBF4OHFxvWBHVSS++iIaSNIkiIU2k6Gb+HdvzawwpolI5g4poIElXAD+3fWLdsUTUJWdQEQ0k6UbbL6k7jog6pZNERDPdIGnTzkJJM2uIJaIWaeKLaKZNgF9Luh5Y0la+T03xRFQuCSqimXYGvtalfFnVgUTUJQkqokEk7QXsS3EGdZHtuR3r76wlsIgapJNERENI+iDwWYqBYUcD2wEH2f5RrYFF1CSdJCKaYzrwEttTbb+Y4nrTMTXHFFGbJKiI5lhs+9bWgu1ZdDTDS3pl1UFF1CUJKqI5nlRhvdajswz4VN1BRlQl16AiGkLScqDzH1KdZbZHVRZURI3Siy+iOX4FHL2a9QI+X1EsEbXLGVREQ0ja2/bPBlsnYqRIgoqIiEZKJ4mIiGikJKiIiGikdJKIqIik24EHysXJFJ0ebiuXn217ci2BRTRUElREdR6wvQc8PW3GM2wfWi7Pqi+siGZKE19EdVY3O25mzo3okAQVURHb13crl7QhMEPSY5LOLMuOk/SQpNMlfU/SMkmfknSxpFskfV7SqLb3OE7SHElXS/qSpPUr2q2IYZMEFVEz248Be1H8P55UFn8RuMr2sbbfRHHtahtgP2AnYE/gcABJhwCHlWWvAiYAx1e4CxHDIgkqogFsPwhcBhxaFh0A/LSj2nddWAb8D/D2snwacL7tv7i4sfE7wDuHP+qI4ZVOEhHNcTbwL8CpwFuB93esf6Tt+SJg8/L5ROBgSa8ul8cAy4cxzohKJEFFNMePgK9J+kdgue3FHes3bXs+Dri/fH4fcJntz7RWSho3rJFGVCBNfBENUTbdXQB8E/hulypvLqfe2BB4C3B+WT4TeIukMQDlmdTXhj/iiOGVM6iIikn6NLBv8VSftt3eoeFs4E2sev0JiqngfwL8PfAz4BsAts+TtDkwW9KfgUcpZueN6GkZLDaiQSRtB/yT7Q90lM8HppWz7EasE9LEF9EAkt4h6RkUPfK+VXM4EY2QBBXRDDsCNwLjbP+ifYWk7wHPBr4g6aV1BBdRhzTxRUREI+UMKiIiGikJKiIiGikJKiIiGikJKiIiGikJKiIiGun/AxeBjHCSKsUiAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "from matplotlib import rcParams\n",
    "import matplotlib.pyplot as plt\n",
    "ax = df_weather_type.plot(x=\"Type\", y=\"count\", kind=\"bar\")\n",
    "ax.set_xlabel(\"Type\", fontsize=12)\n",
    "ax.set_ylabel(\"Count\", fontsize=12)\n",
    "ax.set_title(\"Weather Type Count\")\n",
    "rcParams.update({'figure.autolayout': True})\n",
    "plt.rc('xtick', labelsize=12) \n",
    "plt.rc('ytick', labelsize=12) \n",
    "plt.rc('font', family='serif')\n",
    "plt.tight_layout()\n",
    "plt.savefig('/Users/pprusty05/google_drive/Data_Mining/Project/plots/Weather_Type.pdf')\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Merge Two dataset"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## rename traffic data columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "column_name = df_traffic.columns\n",
    "for col in column_name:\n",
    "    df_traffic = df_traffic.withColumnRenamed(col, 'TR_' + col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- TR_EventId: string (nullable = true)\n",
      " |-- TR_Type: string (nullable = true)\n",
      " |-- TR_Severity: string (nullable = true)\n",
      " |-- TR_TMC: integer (nullable = true)\n",
      " |-- TR_StartTime: timestamp (nullable = true)\n",
      " |-- TR_EndTime: timestamp (nullable = true)\n",
      " |-- TR_LocationLat: double (nullable = true)\n",
      " |-- TR_LocationLng: double (nullable = true)\n",
      " |-- TR_Distance: double (nullable = true)\n",
      " |-- TR_AirportCode: string (nullable = true)\n",
      " |-- TR_Street: string (nullable = true)\n",
      " |-- TR_Side: string (nullable = true)\n",
      " |-- TR_City: string (nullable = true)\n",
      " |-- TR_County: string (nullable = true)\n",
      " |-- TR_State: string (nullable = true)\n",
      " |-- TR_ZipCode: integer (nullable = true)\n",
      " |-- TR_StartDate: date (nullable = true)\n",
      " |-- TR_EndDate: date (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_traffic.printSchema()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Rename weather data columns"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "column_name = df_weather.columns\n",
    "for col in column_name:\n",
    "    df_weather = df_weather.withColumnRenamed(col, 'WR_' + col)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- WR_EventId: string (nullable = true)\n",
      " |-- WR_Type: string (nullable = true)\n",
      " |-- WR_Severity: string (nullable = true)\n",
      " |-- WR_StartTime: timestamp (nullable = true)\n",
      " |-- WR_EndTime: timestamp (nullable = true)\n",
      " |-- WR_TimeZone: string (nullable = true)\n",
      " |-- WR_LocationLat: double (nullable = true)\n",
      " |-- WR_LocationLng: double (nullable = true)\n",
      " |-- WR_AirportCode: string (nullable = true)\n",
      " |-- WR_City: string (nullable = true)\n",
      " |-- WR_County: string (nullable = true)\n",
      " |-- WR_State: string (nullable = true)\n",
      " |-- WR_ZipCode: integer (nullable = true)\n",
      " |-- WR_StartDate: date (nullable = true)\n",
      " |-- WR_EndDate: date (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "df_weather.printSchema()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Inner join based on start date and zipcode"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "inner_join = df_traffic.join(df_weather, (df_traffic.TR_ZipCode == df_weather.WR_ZipCode) &\n",
    "                             (df_traffic.TR_StartDate == df_weather.WR_StartDate))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "root\n",
      " |-- TR_EventId: string (nullable = true)\n",
      " |-- TR_Type: string (nullable = true)\n",
      " |-- TR_Severity: string (nullable = true)\n",
      " |-- TR_TMC: integer (nullable = true)\n",
      " |-- TR_StartTime: timestamp (nullable = true)\n",
      " |-- TR_EndTime: timestamp (nullable = true)\n",
      " |-- TR_LocationLat: double (nullable = true)\n",
      " |-- TR_LocationLng: double (nullable = true)\n",
      " |-- TR_Distance: double (nullable = true)\n",
      " |-- TR_AirportCode: string (nullable = true)\n",
      " |-- TR_Street: string (nullable = true)\n",
      " |-- TR_Side: string (nullable = true)\n",
      " |-- TR_City: string (nullable = true)\n",
      " |-- TR_County: string (nullable = true)\n",
      " |-- TR_State: string (nullable = true)\n",
      " |-- TR_ZipCode: integer (nullable = true)\n",
      " |-- TR_StartDate: date (nullable = true)\n",
      " |-- TR_EndDate: date (nullable = true)\n",
      " |-- WR_EventId: string (nullable = true)\n",
      " |-- WR_Type: string (nullable = true)\n",
      " |-- WR_Severity: string (nullable = true)\n",
      " |-- WR_StartTime: timestamp (nullable = true)\n",
      " |-- WR_EndTime: timestamp (nullable = true)\n",
      " |-- WR_TimeZone: string (nullable = true)\n",
      " |-- WR_LocationLat: double (nullable = true)\n",
      " |-- WR_LocationLng: double (nullable = true)\n",
      " |-- WR_AirportCode: string (nullable = true)\n",
      " |-- WR_City: string (nullable = true)\n",
      " |-- WR_County: string (nullable = true)\n",
      " |-- WR_State: string (nullable = true)\n",
      " |-- WR_ZipCode: integer (nullable = true)\n",
      " |-- WR_StartDate: date (nullable = true)\n",
      " |-- WR_EndDate: date (nullable = true)\n",
      "\n"
     ]
    }
   ],
   "source": [
    "inner_join.printSchema()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>count</th>\n",
       "      <th>percent</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>TR_EventId</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_Type</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_Severity</td>\n",
       "      <td>712913.0</td>\n",
       "      <td>12.853749</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_TMC</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_StartTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_EndTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_LocationLat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_LocationLng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_Distance</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_AirportCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_Street</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_Side</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_City</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_State</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_ZipCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_StartDate</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>TR_EndDate</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_EventId</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_Type</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_Severity</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_StartTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_EndTime</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_TimeZone</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_LocationLat</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_LocationLng</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_AirportCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_City</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_County</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_State</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_ZipCode</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_StartDate</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>WR_EndDate</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                   count    percent\n",
       "TR_EventId           0.0   0.000000\n",
       "TR_Type              0.0   0.000000\n",
       "TR_Severity     712913.0  12.853749\n",
       "TR_TMC               0.0   0.000000\n",
       "TR_StartTime         0.0   0.000000\n",
       "TR_EndTime           0.0   0.000000\n",
       "TR_LocationLat       0.0   0.000000\n",
       "TR_LocationLng       0.0   0.000000\n",
       "TR_Distance          0.0   0.000000\n",
       "TR_AirportCode       0.0   0.000000\n",
       "TR_Street            0.0   0.000000\n",
       "TR_Side              0.0   0.000000\n",
       "TR_City              0.0   0.000000\n",
       "TR_County            0.0   0.000000\n",
       "TR_State             0.0   0.000000\n",
       "TR_ZipCode           0.0   0.000000\n",
       "TR_StartDate         0.0   0.000000\n",
       "TR_EndDate           0.0   0.000000\n",
       "WR_EventId           0.0   0.000000\n",
       "WR_Type              0.0   0.000000\n",
       "WR_Severity          0.0   0.000000\n",
       "WR_StartTime         0.0   0.000000\n",
       "WR_EndTime           0.0   0.000000\n",
       "WR_TimeZone          0.0   0.000000\n",
       "WR_LocationLat       0.0   0.000000\n",
       "WR_LocationLng       0.0   0.000000\n",
       "WR_AirportCode       0.0   0.000000\n",
       "WR_City              0.0   0.000000\n",
       "WR_County            0.0   0.000000\n",
       "WR_State             0.0   0.000000\n",
       "WR_ZipCode           0.0   0.000000\n",
       "WR_StartDate         0.0   0.000000\n",
       "WR_EndDate           0.0   0.000000"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "missing_value_weather_pd=find_missing_values(inner_join)\n",
    "missing_value_weather_pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "inner_join.write.parquet(\"/Users/pprusty05/workspace/Data_mining/Project/TrafficWeatherMerged.parquet\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "5546343"
      ]
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "inner_join.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "4328353"
      ]
     },
     "execution_count": 41,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_weather.count()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "25073666"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df_traffic.count()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
